package lia.tools; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import java.io.FileWriter; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder; import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter; import org.apache.lucene.search.vectorhighlight.FieldQuery; import org.apache.lucene.search.vectorhighlight.FragListBuilder; import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder; import org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; // From chapter 8 public class FastVectorHighlighterSample { static final String[] DOCS = { // #A "the quick brown fox jumps over the lazy dog", // #A "the quick gold fox jumped over the lazy black dog", // #A "the quick fox jumps over the black dog", // #A "the red fox jumped over the lazy dark gray dog" // #A }; static final String QUERY = "quick OR fox OR \"lazy dog\"~1"; // #B static final String F = "f"; static Directory dir = new RAMDirectory(); static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: FastVectorHighlighterSample <filename>"); System.exit(-1); } makeIndex(); searchIndex(args[0]); } static void makeIndex() throws IOException { IndexWriter writer = new IndexWriter(dir, analyzer, true, MaxFieldLength.UNLIMITED); for(String d : DOCS){ Document doc = new Document(); doc.add(new Field(F, d, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); } writer.close(); } static void searchIndex(String filename) throws Exception { QueryParser parser = new QueryParser(Version.LUCENE_30, F, analyzer); Query query = parser.parse(QUERY); FastVectorHighlighter highlighter = getHighlighter(); // #C FieldQuery fieldQuery = highlighter.getFieldQuery(query); // #D IndexSearcher searcher = new IndexSearcher(dir); TopDocs docs = searcher.search(query, 10); FileWriter writer = new FileWriter(filename); writer.write("<html>"); writer.write("<body>"); writer.write("<p>QUERY : " + QUERY + "</p>"); for(ScoreDoc scoreDoc : docs.scoreDocs) { String snippet = highlighter.getBestFragment( // #E fieldQuery, searcher.getIndexReader(), // #E scoreDoc.doc, F, 100 ); // #E if (snippet != null) { writer.write(scoreDoc.doc + " : " + snippet + "<br/>"); } } writer.write("</body></html>"); writer.close(); searcher.close(); } static FastVectorHighlighter getHighlighter() { FragListBuilder fragListBuilder = new SimpleFragListBuilder(); // #F FragmentsBuilder fragmentBuilder = // #F new ScoreOrderFragmentsBuilder( // #F BaseFragmentsBuilder.COLORED_PRE_TAGS, // #F BaseFragmentsBuilder.COLORED_POST_TAGS); // #F return new FastVectorHighlighter(true, true, // #F fragListBuilder, fragmentBuilder); // #F } } /* #A Index these documents #B Run this query #C Get FastVectorHighlighter #D Create FieldQuery #E Highlight top fragment #F Create FastVectorHighlighter */